First, import and tidy data:

hiv_data = read_csv("./data/DOHMH_HIV_AIDS_Annual_Report.csv") %>% 
  janitor::clean_names() %>% 
  rename(neighborhood = uhf) %>% 
  filter(year != "ALL", borough != "All", neighborhood != "All", gender != "All") %>% 
  mutate(year = as.character(year), age = as.factor(age))

gender neighborhood VS hiv

neb_plot = hiv_data %>% 
  group_by(neighborhood, gender) %>% 
  filter(age != "All") %>%
  summarise(sum_hiv = sum(hiv_diagnoses)) %>% 
  ggplot(aes(x = reorder(neighborhood, sum_hiv), y = sum_hiv, color = gender)) + 
  coord_flip() +
  geom_point() +
  labs(
        title = "Gender and Neighborhood Influence on HIV Incidence",
        x = "Neighborhood",
        y = "HIV diagnoses",
        caption = "Data from the ..."
      )

ggplotly(neb_plot)
age_plot = hiv_data %>% 
  filter(age != "All") %>% 
  group_by(gender, age) %>% 
  summarise(sum_hiv = sum(hiv_diagnoses)) %>% 
  ggplot(aes(y = sum_hiv, x = age, fill = gender)) + 
  geom_bar(stat="identity", alpha = 0.8, position = position_dodge()) +
  scale_fill_brewer(palette="Dark2") +
  labs(
        title = "Gender and Age Influence on HIV Incidence",
        x = "Age range",
        y = "HIV diagnoses",
        caption = "Data from the ..."
      ) 

ggplotly(age_plot)
race_plot = hiv_data %>% 
  filter(race != "All") %>% 
  group_by(gender, race) %>% 
  summarise(sum_hiv = sum(hiv_diagnoses)) %>% 
  ggplot(aes(y = sum_hiv, x = reorder(race, sum_hiv), fill = gender)) + 
  geom_bar(stat="identity", alpha = 0.8, position=position_dodge()) +
  scale_fill_manual(values=c("#E69F00", "#56B4E9")) +
  labs(
        title = "Race and Gender Influence on HIV Incidence",
        x = "Race",
        y = "HIV diagnoses",
        caption = "Data from the ..."
      ) 

ggplotly(race_plot)

Years in Bronx young (20-29) male

year_plot = hiv_data %>% 
  mutate(year = as.integer(year)) %>% 
  filter(borough == "Bronx" & gender == "Male" & age == "20 - 29") %>% 
  group_by(year, neighborhood) %>% 
  summarize(sum_hiv = sum(hiv_diagnoses)) %>% 
  ggplot(aes(x = year, y = sum_hiv, color = neighborhood)) +
  geom_line()
ggplotly(year_plot)
fit = lm(hiv_data$hiv_diagnoses ~ hiv_data$borough + hiv_data$gender + hiv_data$age)